clear
set more off, perm
cd /Users/zachbrown/Projects/PriceTransparency/Data/

// Use medical claims
global year_min 2005
global year_max 2014
global keep_vars "serv_prov_key serv_prov_cw_key year"

        forval yr = $year_min(1)$year_max {
		disp "year: `yr'"
		tempfile tmpdata
		shell nice gunzip Raw/med_clm/med_clm_`yr'_clean.dta -c > `tmpdata'
		append using `tmpdata', keep($keep_vars)
	}


contract serv_prov_cw_key serv_prov_key, freq(n)
duplicates drop serv_prov_key, force

// Merge on provider dataset
rename serv_prov_key prov_key
merge 1:1 prov_key using Raw/ref_tables/provider_detail.dta

gen sourcet = "In claims" if _merge==1
replace sourcet = "In prov detail" if _merge==2
replace sourcet = "In both" if _merge==3
encode sourcet, gen(source)
drop _merge sourcet

gen same = (serv_prov_cw_key==prov_cw_key)
tab same if source==1
drop same

egen npi_id = group(npi)
egen prvtax_id = group(prvtaxid)

// Rename
rename serv_prov_cw_key prov_cw_key1
rename prov_cw_key prov_cw_key2

order prov_key prov_cw_key1 prov_cw_key2 npi_id prvtax_id
keep prov_key prov_cw_key1 prov_cw_key2 npi_id prvtax_id
compress

save build/prov_cw_crosswalk.dta, replace

